import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from turorial.items import SuncrawlspiderItem


class SunSpider(CrawlSpider):
    name = 'sun'
    allowed_domains = ['wz.sun0769.com']
    start_urls = ['http://wz.sun0769.com/political/index/politicsNewest?id=1&page=0']

    rules = (
        Rule(LinkExtractor(allow=r'/political/index/politicsNewest\?id=1&page=\d+'), callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        item = SuncrawlspiderItem()
        problems = response.xpath('//ul[@class="title-state-ul"]/li')
        for problem in problems:
            item['problem_id'] = problem.xpath('span[@class="state1"]/text()').get()
            item['problem_title'] = problem.xpath('span[@class="state3"]/a/text()').get()
            item['problem_time'] = problem.xpath('span[@class="state5"]/text()').get()
            yield item
